import scrapy
from scrapy.http import Request
from blogs.items import BlogsItem

baseUrl = 'http://www.r9it.com'
class R9itSpider(scrapy.Spider):
    name = 'r9it'
    allowed_domains = ['www.r9it.com']
    start_urls = ['http://www.r9it.com/']

    def parse(self, response):
        urls = response.xpath('//*[@id="app"]//div[@class="blog-list"]//div[@class="title"]/a/@href').extract()
        for post_url in urls:
            if post_url.endswith('html'):
                yield Request(baseUrl + post_url, callback=self.parse_detail)
        pass

    def parse_detail(self, response):
        item = BlogsItem()
        title = response.xpath('//*[@class="page-title"]/h1/text()').extract_first()
        content = response.xpath('//*[@class="theme-reco-content content__default"]/p/text()').extract_first()
        tag = response.xpath('//*[@class="page-title"]/div/i[4]/span').xpath('string(.)').extract()
        view_num = response.xpath('//*[@class="page-title"]/div/i[3]/span/text()').extract_first()
        item['title'] = title
        item['content'] = content
        item['tag'] = tag
        item['view_num'] = view_num
        item['type'] = 0
        yield item